/****************************************
Table A.15
*****************************************/

* Load Census Data Cleaned and Collapsed
use "$dir/Data/Final/final_collapsed_clean.dta", clear
	
gen i = birthstatefip
gen t = (birthdecade/10) + 1 // someone who is 10 when ban is put in place is treated (time 0)
gen Ei = floor(year_of_ban/10) 	// decade when unit is first treated
gen K = t-Ei 			// "relative time" num decades since treated (could be missing if never-treated)


forvalues l = 1/6 {
	gen A`l'event = K==-`l'
}

forvalues l = 0/6 {
	gen P`l'event = K==`l'
}


gen insample6treat = abs(K) <= 6 & K != .


* Interaction treatment vars
forvalues pre = 6(-1)2 {
	cap n gen cmH_Pre`pre' = cmH * A`pre'event
}

forvalues post = 0/6 {
	cap n gen cmH_Post`post' = cmH * P`post'event
}

forvalues x = 0/6 {
	cap label var cmH_Pre`x' "Pre `x'"
	cap label var cmH_Post`x' "Post `x'"
}



replace Ei = . if cmH == 0 /* Treating low CM surnames as never having faced a ban */
gen nevertreated = Ei == . /* this includes low CM surnames and those that never faced a ban */

drop if year_of_ban > 1940 /* this drops all states that banned after 1940 or never banned because they
have year of ban value missing, so control group is low cousin marriage surnames in states that banned
by 1940 */


keep if insample6treat == 1


** Transforming and labelling outcome variables **


* Transformations
gen ln_citypop = ln(citypop)
gen ln_occscore = ln(occscore)
gen ln_inchat = log(inchat_levels)
drop inchat_levels
gen occ_disp = 1 - occ_disp_hhi 
gen occ_disp_10digit = 1 - occ_disp_hhi_10digit 
gen occ_disp_occ_class = 1 - occ_disp_hhi_occ_class
gen geog_disp_state = 1 - geog_disp_hhi_state
gen geog_disp_county = 1 - geog_disp_hhi_county
gen geog_disp_enumdist = 1 - geog_disp_hhi_enumdist
gen share_ch_uncmn = 1 - share_ch_cmn 


* Scaling genetic outcomes
foreach var of varlist(gq_inst-genetic_d){
	qui: replace `var' = `var'*10000
}

* Labels
label var birthdecade "Birth decade"
label var cmH ">10% cousin marriage rate (-1858)"
label var birthstatefip "Birth state"
label var resstatefip "Residence state"
label var year "Census year"
label var count_dor "Num census individuals in cell"


label var occscore "Occscore"
label var inchat "LIDO Income (Original)"
label var ln_inchat "LIDO Income"
label var citypop "City size"
label var urban "Urban"
label var farm "Residence: Farm"
label var mig_life "Interstate Migration"
label var mig_life_noban "Interstate Migration (State without ban)"
label var mig_life_ban "Interstate Migration (State with ban)"
label var ln_citypop "Urbanization (log residence pop'n)"
label var divorced "Divorced"
label var mult_genh "Multigenerational HH"
label var nchild "No. of resident children"
label var nchlt5 "No. of resident children (under 5)"
label var agemarr "Age of Marriage"
label var durmarr "Duration of Current Marriage"
label var ncouples "No. of couples in unit"
label var ncouples_wo "No. of couples in unit (w/o own)"
label var nfams "No. of unrelated families in unit"
label var famsize "Family Size"
label var nsibs "No. of siblings in unit"
label var nm_single "Never Married/Single"
label var occ_disp_hhi "Occupational Dispersion HHI"
label var occ_disp_hhi_10digit "Occupational Dispersion HHI (10-digit codes)"
label var occ_disp_hhi_occ_class "Occupational Dispersion HHI (Farmer, White Collar, Blue Collar, Self-Employed)"
label var prank "Income percentile-rank"
label var geog_disp_hhi_state "Geographical Dispersion (State)"
label var geog_disp_hhi_county "Geographical Dispersion (County)"
label var geog_disp_hhi_enumdist "Geographical Dispersion (Enumeration District)"
label var occ_disp "Occupational Dispersion (HHI)"
label var occ_disp_10digit "Occupational Dispersion (HHI) (10-digit)"
label var occ_disp_occ_class "Occupational Dispersion (HHI) - White/Blue Collar, Farmer, Self-Employed"
label var geog_disp_state "Geographical Dispersion (HHI) - State"
label var geog_disp_county "Geographical Dispersion (HHI) - County"
label var geog_disp_enumdist "Geographical Dispersion (HHI) - Enumeration District"
label var gq_inst "Institutionalized, per 10,000"
label var gq_inst_med "Institutionalized (Medical), per 10,000"
label var gq_poorhouse "Institutionalized (Poorhouse), per 10,000"
label var hmem_disable "Household member (Disabled), per 10,000"
label var blind_deaf "Blind/Deaf, per 10,000"
label var genetic_d "Institutionalized (Medical) + Disability, per 10,000"
label var share_ch_cmn "Share childr. with common names"
label var share_ch_uncmn "Share childr. with uncommon names"

foreach v of varlist(ln_occscore_nm-genetic_d_nm){
	label var `v' "Number of individuals with non-missing outcome in cell"
}



compress

** Merge with time-varying data **
merge m:1 birthstatefip birthdecade using ///
"Data/Original/robustness_interactions.dta"
keep if _merge == 3
drop _merge

/* Labelling robustness variables */
label var state_pc_rail_coverage "Percentage rail road coverage (state)"
label var minagemarriage "Min. age of marriage (females)"
label var sterilization_law "Sterilization law in place"
label var comp_schooling "Comp Schooling Law"
label var statehood "Part of Union"
label var pct_stateTFE "Total Frontier Exp (% of State)"
label var foreign_born "Share foreign-born"
label var sex_ratio "Sex-ratio"


/* Generating interactions with high-cousin marriage dummy */
gen cmHR_rc = cmH*state_pc_rail_coverage /* Rail Road Coverage */
gen cmHR_magemarr = cmH*minagemarr16 /* Min age of marriage */
gen cmHR_sl = cmH*sterilization_law /* Sterilization Law */
gen cmHR_cs = cmH*comp_schooling /* Compulsory Schooling */
gen cmHR_sh = cmH*statehood /* Statehood */
gen cmHR_tfe = cmH*pct_stateTFE /* Total Frontier Experience */
gen cmHR_sfe = cmH*foreign_born /* Share Foreign Born */
gen cmHR_sr = cmH*sex_ratio /* sex-ratios */

global pre_yrs 6
global post_yrs 6


/**************************************
Table A.15
**************************************/

eststo clear
eventstudyinteract ln_citypop ///
	cmH ///
    cmH_P* ///
	[aweight = 	ln_citypop_nm] , absorb(i.i##i.t i.year i.i##i.cmH) covariates(cmHR_rc cmHR_sl cmHR_cs cmHR_sh cmHR_magemarr cmHR_tfe cmHR_sfe cmHR_sr) ///
	vce(cluster i) ///
	cohort(Ei) control_cohort(nevertreated)
	
	* Covariates
	disp _b[cmHR_sl], _se[cmHR_sl], (_b[cmHR_sl]/_se[cmHR_sl])
	disp _b[cmHR_cs], _se[cmHR_cs], (_b[cmHR_cs]/_se[cmHR_cs])
	disp _b[cmHR_sh], _se[cmHR_sh], (_b[cmHR_sh]/_se[cmHR_sh])
	disp _b[cmHR_magemarr], _se[cmHR_magemarr], (_b[cmHR_magemarr]/_se[cmHR_magemarr])
    disp _b[cmHR_rc], _se[cmHR_rc], (_b[cmHR_rc]/_se[cmHR_rc])
	disp _b[cmHR_tfe], _se[cmHR_tfe], (_b[cmHR_tfe]/_se[cmHR_tfe])
	disp _b[cmHR_sfe], _se[cmHR_sfe], (_b[cmHR_sfe]/_se[cmHR_sfe])
	disp _b[cmHR_sr], _se[cmHR_sr], (_b[cmHR_sr]/_se[cmHR_sr])

	
	
 	matrix b_iw = e(b_iw) // Save the results for table output
 	matrix V_iw = e(V_iw)
	capture matrix V_iw = diag(e(V_iw))
 	erepost b = b_iw V = V_iw // ssc install erepost
 	esttab
	eststo A1
	su ln_citypop [fw = ln_citypop_nm]
	local obs1 = r(N)
    su ln_citypop [aw = ln_citypop_nm] if e(sample) == 1
    local meanA = string(round(r(mean), .01))
 
 
eventstudyinteract ln_occscore ///
	cmH ///
    cmH_P* ///
	[aweight = ln_occscore_nm] , absorb(i.i##i.t i.year i.i##i.cmH) covariates(cmHR_rc cmHR_sl    cmHR_cs cmHR_sh cmHR_magemarr cmHR_tfe cmHR_sfe cmHR_sr) ///
	vce(cluster i) ///
	cohort(Ei) control_cohort(nevertreated)
	
	* Covariates
	disp _b[cmHR_sl], _se[cmHR_sl], (_b[cmHR_sl]/_se[cmHR_sl])
	disp _b[cmHR_cs], _se[cmHR_cs], (_b[cmHR_cs]/_se[cmHR_cs])
	disp _b[cmHR_sh], _se[cmHR_sh], (_b[cmHR_sh]/_se[cmHR_sh])
	disp _b[cmHR_magemarr], _se[cmHR_magemarr], (_b[cmHR_magemarr]/_se[cmHR_magemarr])
    disp _b[cmHR_rc], _se[cmHR_rc], (_b[cmHR_rc]/_se[cmHR_rc])
	disp _b[cmHR_tfe], _se[cmHR_tfe], (_b[cmHR_tfe]/_se[cmHR_tfe])
	disp _b[cmHR_sfe], _se[cmHR_sfe], (_b[cmHR_sfe]/_se[cmHR_sfe])
	disp _b[cmHR_sr], _se[cmHR_sr], (_b[cmHR_sr]/_se[cmHR_sr])

	
 	matrix b_iw = e(b_iw) // Save the results for table output
 	matrix V_iw = e(V_iw)
	capture matrix V_iw = diag(e(V_iw))
 	erepost b = b_iw V = V_iw // ssc install erepost
 	esttab
	eststo A2
	sum ln_occscore [fw = ln_occscore_nm]
	local obs2 = r(N)
    su ln_occscore [aw = ln_occscore_nm] if e(sample) == 1
    local meanB = string(round(r(mean), .01))
 


 estout A1 A2 using "$dir/Output/TablesFigures/results_main_pint.tex", style(tex) replace ///
    keep(cmH_*) ///
	cells(b(star fmt(%9.4f)) se(par)) ///
	nolabel collabels(none) mlabels(none) starlevels(* 0.10 ** 0.05 *** 0.01) ///
	varlabels(cmH_Pre6 "-6" cmH_Pre5 "-5" ///
	cmH_Pre4 "-4" cmH_Pre3 "-3" ///
	cmH_Pre2 "-2" cmH_Post0 "0" ///
	cmH_Post1 "1" cmH_Post2 "2" ///
	cmH_Post3 "3" cmH_Post4 "4" ///
	cmH_Post5 "5" cmH_Post6 "6" ///
	cmHR_rc "High CM X Rail Coverage" ///
	cmHR_cs "High CM X Compulsory Schooling Law" ///
	cmHR_sh "High CM X Statehood" ///
	cmHR_magemarr "High CM X Min Age of Marriage" ///
	cmHR_tfe "High CM X Total Frontier Experience" ///
	cmHR_sfe "High CM X Share Foriegn-Born" ///
	cmHR_sr "High CM X Sex-ratio (Male to Female)")
	
	

local tex " \\ \hline"
local tex "`tex' Observations & `obs1' & `obs2' \\"
local tex "`tex' Mean Dep. Var & `meanA' & `meanB'   \\"
local tex "`tex' State X Birth Decade FE & Yes & Yes \\"
local tex "`tex' State X High CM F.E. & Yes & Yes  \\"
local tex "`tex' Census year F.E. & Yes & Yes  \\"
local tex "`tex' \multicolumn{3}{p{8cm}}{\tiny \textit{Notes:} Standard errors clustered at state-level."
local tex "`tex' *** p<0.01, ** p<0.05, * p<0.1.} \\ \end{tabular} }"
	
esttab A1 A2 using "$dir/Output/TablesFigures/results_main_pinta.tex", style(tex) replace booktabs ///
	d(*) nolabel collabels(none) noobs postfoot("`tex'") nonum ///
	mtitles("(1)" "(2)") ///
	mgroups("Urbanization (log residence pop'n)" "Occupational Income (Log)", pattern(1 1) ///
	prefix(\multicolumn{@span}{c}{) suffix(}) span erepeat(\cmidrule(lr){@span}))
	
	

